{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 抓取163博客日志\n",
    "\n",
    "- 作者：DGC'Idea\n",
    "- 版本：V0.1   \n",
    "- 更新日期：2018年4月14日\n",
    "\n",
    "----------\n",
    "\n",
    "- 研究目的：获取163博客某个博主的日志；\n",
    "- 研究方法：从163博客网站爬取博主所有日志。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 已准备好!\n"
     ]
    }
   ],
   "source": [
    "#引入包\n",
    "import pandas as pd\n",
    "import requests,re,time\n",
    "\n",
    "# 自定义\n",
    "import sys\n",
    "sys.path.append('/home/jquser')\n",
    "\n",
    "from dm import *\n",
    "from tl import exists_file\n",
    "\n",
    "\n",
    "# 数据目录\n",
    "_data_path='../../Data/'\n",
    "# 数据引擎\n",
    "_data=Csv(_data_path)\n",
    "\n",
    "\n",
    "\n",
    "# 163博客返回的为dwr数据格式\n",
    "# 必须采用headers请求\n",
    "# 配置param\n",
    "# requests.post(url,data=param,headers=headers)\n",
    "\n",
    "# 页面请求头\n",
    "headers={ \n",
    "    'Accept':'*/*',\n",
    "    'Accept-Encoding':'gzip, deflate',\n",
    "    'Accept-Language':'zh-CN,zh;q=0.8',\n",
    "    'Connection':'keep-alive',\n",
    "    'Content-Length':'274',\n",
    "    'Content-Type':'text/plain',\n",
    "    'Host':'api.blog.163.com',\n",
    "    'Origin':'http://api.blog.163.com',\n",
    "    'Referer':'http://api.blog.163.com/crossdomain.html?t=20100205',\n",
    "    'User-Agent':'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.104 \\\n",
    "    Safari/537.36 Core/1.53.4620.400 QQBrowser/9.7.13014.400}  ',\n",
    "}\n",
    "   \n",
    "# 请求参数\n",
    "param={ \n",
    "    'callCount':'1',\n",
    "    'scriptSessionId':'${scriptSessionId}187',\n",
    "    'c0-scriptName':'BlogBeanNew',\n",
    "    'c0-methodName':'getBlogs',\n",
    "    'c0-id':'0',\n",
    "    'c0-param0':'number:60278066',#用户id\n",
    "    'c0-param1':'number:0',#第几条\n",
    "    'c0-param2':'number:1',#一次几条\n",
    "    'batchId':'318074',\n",
    "    }\n",
    "\n",
    "# 返回的数据格式\n",
    "\n",
    "# //#DWR-REPLY\n",
    "# var s0={};var s1={};var s2=[];s0.abstractSysGen=1;s0.accessCount=5;s0.allowComment=-100;s0.allowView=-100;\n",
    "# s0.blogAbstract=\"<P>\\u76F8\\u805A\\u4E0D\\u77E5\\u6101\\u6ECB\\u5473\\uFF0C</P>\\r\\n<P>\\u6B22\\u7B11\\u65E0\\u5FE7\\uFF0C</P>\\r\\n<P>\n",
    "# \\u6B22\\u7B11\\u65E0\\u5FE7\\uFF0C</P>\\r\\n<P>\\u6E38\\u620F\\u7EC8\\u65E5\\u5F3A\\u8BF4\\u6101\\u3002</P>\\r\\n<P>&nbsp;</P>\\r\\n<P>\n",
    "# \\u522B\\u540E\\u65B9\\u77E5\\u6101\\u6ECB\\u5473\\uFF0C</P>\\r\\n<P>\\u6B32\\u8BF4\\u8FD8\\u4F11\\uFF0C</P>\\r\\n<P>\n",
    "# \\u6B32\\u8BF4\\u8FD8\\u4F11\\uFF0C</P>\\r\\n<P>\\u5374\\u9053\\u6625\\u522B\\u805A\\u65F6\\u79CB\\u3002</P>\";\n",
    "# s0.blogAttachments=null;s0.blogCount=s1;s0.blogExt=null;s0.circleCount=0;s0.circleIdList=s2;s0.circleIds=null;\n",
    "# s0.classId=\"fks_087064093095089071093087082095085087080071082081085067085\";s0.className=\"C.\\u8BCD\";s0.commentCount=0;\n",
    "# s0.comments=null;s0.content=\"<P>\\u76F8\\u805A\\u4E0D\\u77E5\\u6101\\u6ECB\\u5473\\uFF0C</P>\\r\\n<P>\\u6B22\\u7B11\\u65E0\\u5FE7\\uFF0C\n",
    "# </P>\\r\\n<P>\\u6B22\\u7B11\\u65E0\\u5FE7\\uFF0C</P>\\r\\n<P>\\u6E38\\u620F\\u7EC8\\u65E5\\u5F3A\\u8BF4\\u6101\\u3002</P>\\r\\n<P>&nbsp;\n",
    "# </P>\\r\\n<P>\\u522B\\u540E\\u65B9\\u77E5\\u6101\\u6ECB\\u5473\\uFF0C</P>\\r\\n<P>\\u6B32\\u8BF4\\u8FD8\\u4F11\\uFF0C</P>\\r\\n<P>\n",
    "# \\u6B32\\u8BF4\\u8FD8\\u4F11\\uFF0C</P>\\r\\n<P>\\u5374\\u9053\\u6625\\u522B\\u805A\\u65F6\\u79CB\\u3002</P>\";\n",
    "# s0.contentPlainText=null;s0.id=\"fks_082067085083086066084094094095085087080071082081085067085\";s0.ip=\"221.130.168.7\";\n",
    "# s0.isBlogAbstractComplete=true;s0.isPublished=1;s0.keyName=\"ID\";s0.keyWordCheckedState=0;\n",
    "# s0.lastAccessCountUpdateTime=1523684498270;s0.matchedKeyWord=false;s0.modifyTime=0;s0.moveFrom=\"NONE\";\n",
    "# s0.permaSerial=\"1114673012009225115942319\";s0.permalink=\"blog/static/1114673012009225115942319\";s0.photoIds=null;\n",
    "# s0.photoStoreTypes=null;s0.publishTime=1237953582319;s0.publishTimeStr=\"11:59:42\";s0.publisherId=0;\n",
    "# s0.publisherNickname=null;s0.publisherUsername=null;s0.rank=0;s0.recomBlogHome=false;s0.ref=false;\n",
    "# s0.shortPublishDateStr=\"2009-3-25\";s0.synchLofter=-1;s0.synchMiniBlog=-1;s0.tag=\"\";s0.title=\"\\u91C7\\u6851\\u5B50\";\n",
    "# s0.trackbackCount=0;s0.trackbackUrl=\"blog/1114673012009225115942319.track\";s0.userId=111467301;s0.userName=\"dgczy@126\";\n",
    "# s0.userNickname=\"dgczy\";s0.valid=0;s0.zipContent=null;\n",
    "# s1.accessCount=5;s1.blogId=401571088;s1.commentCount=0;s1.likeCount=0;s1.mainCommentCount=0;\n",
    "# s1.permaSerial=\"1114673012009225115942319\";s1.postHot=0;s1.recommendCount=0;s1.trackbackCount=0;s1.userId=111467301;\n",
    "# dwr.engine._remoteHandleCallback('318074','0',[s0]);\n",
    "\n",
    "\n",
    "# 获取微博发帖\n",
    "def get_blog_data(user_name,user_id,count,begin=0):\n",
    "    #请求链接\n",
    "    url='http://api.blog.163.com/%s/dwr/call/plaincall/BlogBeanNew.getBlogs.dwr'%(user_name)\n",
    "    #配置用户id\n",
    "    param['c0-param0']='number:%s'%(user_id)\n",
    "    #数据结构\n",
    "    df=pd.DataFrame()\n",
    "    data_list=[]\n",
    "    columns=['title','publishdate','permalink','content','sended']\n",
    "    #遍历所有发帖   \n",
    "    for i in range(begin,count):\n",
    "        #配置每次获取日志的序号\n",
    "        param['c0-param1']='number:%s'%str(i)\n",
    "        #请求数据\n",
    "        r=requests.post(url,data=param,headers=headers)\n",
    "        #返回失败，退出循环\n",
    "        if (r.ok is False) :\n",
    "            break\n",
    "        r=r.text \n",
    "        #unicode转换为中文\n",
    "        r=r.encode('utf8').decode('unicode_escape') \n",
    "        print(re.split(\"\"\"(?<!\\\"[^;]+);(?![^;]+\\\")\"\"\", r))\n",
    "        1/0\n",
    "        #解析标题\n",
    "        title=r.split('s0.title=\"')[1].split('\";')[0]\n",
    "        #解析发布日期\n",
    "        publishdate=r.split('s0.shortPublishDateStr=\"')[1].split('\";')[0]\n",
    "        #解析发布时间\n",
    "        publishtime=r.split('s0.publishTimeStr=\"')[1].split('\";')[0]\n",
    "        #静态网页网址\n",
    "        permalink=r.split('s0.permalink=\"')[1].split('\";')[0] \n",
    "        #正文\n",
    "        content=r.split('s0.content=\"')[1].split('\";')[0] \n",
    "        #添加信息到数据表\n",
    "        data_list.append([title,'%s %s'%(publishdate,publishtime),permalink,content,False])\n",
    "        #延时5秒，防止被踢\n",
    "#         time.sleep(2)\n",
    "        print(' 已获取：第 %s 条，%s，%s %s'%(str(i),title,publishdate,publishtime),end='\\r')\n",
    "    #生成数据表    \n",
    "    df=pd.DataFrame(data_list,columns=columns)   \n",
    "    #返回数据\n",
    "    return df\n",
    "\n",
    "\n",
    "# 博主信息网址格式\n",
    "# http://blog.163.com/dgczy@126/blog/#m=0\n",
    "\n",
    "# 网页中博主信息\n",
    "# UD.host = {\n",
    "#       userId:111467301\n",
    "#      ,userName:'dgczy@126'\n",
    "#      ,nickName:'dgczy'\n",
    "#      ,imageUpdateTime:-1\n",
    "#      ,baseUrl:'http://blog.163.com/dgczy@126/'\n",
    "#      ,gender:'他'\n",
    "#      ,email:'dgczy@126.com'\n",
    "#      ,photo163Name:'dgczy@126'\n",
    "#      ,photo163HostName:'dgczy@126'\n",
    "#      ,TOKEN_HTMLMODULE:''\n",
    "#      ,isMultiUserBlog:false\n",
    "#      ,isWumiUser:true\n",
    "#      ,sRank:-100\n",
    "#   };\n",
    "\n",
    "# 网页中分类信息，计算count后的数字，得到所有日志数量\n",
    "# c:[{id:'-2',name:'草稿箱',count:0},{id:'-3',name:'回收站',count:0},\n",
    "# {id:\"fks_087071085083088066093082087095085087080071082081085067085\",name:\"A.日记\",count:4},\n",
    "# {id:\"fks_087064093095089071093086083095085087080071082081085067085\",name:\"B.家\",count:4},\n",
    "# {id:\"fks_087064093095089071093087082095085087080071082081085067085\",name:\"C.词\",count:20},\n",
    "# {id:\"fks_087064093095089071093087085095085087080071082081085067085\",name:\"D.诗歌集\",count:0},\n",
    "# {id:\"fks_087064093095089071093086095095085087080071082081085067085\",name:\"E.密码学初探\",count:1},\n",
    "# {id:\"fks_087064093095089071093086080095085087080071082081085067085\",name:\"F.阿芒思选\",count:1},\n",
    "# {id:\"fks_087071086087083070080086087095085087080071082081085067085\",name:\"G.篆刻\",count:1},\n",
    "# {id:\"fks_087070085084083069081083083095085087080071082081085067085\",name:\"H.设计作品\",count:0},\n",
    "# {id:\"fks_087071086086081065085086080095085087080071082081085067085\",name:\"H.文章转载\",count:2},\n",
    "# {id:\"fks_087064093095088067087081087095085087080071082081085067085\",name:\"默认分类\",count:0}],\n",
    "\n",
    "#获取博主信息\n",
    "def get_blog_userinfo(user_name):\n",
    "    #所有帖子链接（该网页含有用户信息及帖子数）    \n",
    "    url='http://blog.163.com/%s/blog/#m=0'%(user_name)\n",
    "    #请求数据\n",
    "    ret=requests.get(url).text\n",
    "    #获取用户信息\n",
    "    r=ret.split('UD.host = {')[1].split('};')[0]\n",
    "    r=r.replace(\"'\",\"\").replace(\" \",\"\").replace(\"\\r\\n\",\"\")\n",
    "    #解析用户信息\n",
    "    user_id=r.split('userId:')[1].split(',')[0]\n",
    "    user_name=r.split('userName:')[1].split(',')[0]\n",
    "    user_nickname=r.split('nickName:')[1].split(',')[0]\n",
    "    user_baseurl=r.split('baseUrl:')[1].split(',')[0]\n",
    "    user_email=r.split('email:')[1].split(',')[0]\n",
    "    user_gender=r.split('gender:')[1].split(',')[0]\n",
    "    user_gender='男' if user_gender=='他' else '女' \n",
    "    #解析帖子数量\n",
    "    r=ret.split('c:[')[1].split('],')[0]\n",
    "    r=re.findall(r\"count:(.*?)}\",r,re.S)\n",
    "    user_count=0\n",
    "    for i in r[2:]:\n",
    "        user_count+=int(i)\n",
    "    return (user_id,user_name,user_nickname,user_baseurl,user_email,user_gender,user_count)\n",
    "\n",
    "\n",
    "#读取数据        \n",
    "def read_blog_data(user_id):\n",
    "    #文件名\n",
    "    data_file='msg_blog_'+user_id\n",
    "    #从cvs文件恢复数据\n",
    "    df=_data.read(data_file)\n",
    "    return df\n",
    "\n",
    "#保存数据        \n",
    "def save_blog_data(user_id,df):\n",
    "    #文件名\n",
    "    data_file='msg_blog_'+user_id\n",
    "    #保存到cvs文件\n",
    "    _data.save(data_file,df,append=False)\n",
    "\n",
    "\n",
    "#博客博主注册名\n",
    "user_list=[\n",
    "    'gaoya',\n",
    "    ]\n",
    "\n",
    "print(' 已准备好!') "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [
    {
     "ename": "error",
     "evalue": "look-behind requires fixed-width pattern",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31merror\u001b[0m                                     Traceback (most recent call last)",
      "\u001b[1;32m<ipython-input-41-2ed0c04104e7>\u001b[0m in \u001b[0;36m<module>\u001b[1;34m()\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mdf\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mget_blog_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0muser_name\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0muser_id\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0muser_count\u001b[0m\u001b[1;33m,\u001b[0m\u001b[0mbegin\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m<ipython-input-40-3b8398ba04da>\u001b[0m in \u001b[0;36mget_blog_data\u001b[1;34m(user_name, user_id, count, begin)\u001b[0m\n\u001b[0;32m     86\u001b[0m         \u001b[1;31m#unicode转换为中文\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     87\u001b[0m         \u001b[0mr\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mencode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'utf8'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdecode\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'unicode_escape'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m---> 88\u001b[1;33m         \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mre\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"\"\"(?<!\\\"[^;]+);(?![^;]+\\\")\"\"\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mr\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m     89\u001b[0m         \u001b[1;36m1\u001b[0m\u001b[1;33m/\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     90\u001b[0m         \u001b[1;31m#解析标题\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/opt/conda/lib/python3.4/re.py\u001b[0m in \u001b[0;36msplit\u001b[1;34m(pattern, string, maxsplit, flags)\u001b[0m\n\u001b[0;32m    198\u001b[0m     \u001b[1;32mand\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mremainder\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mstring\u001b[0m \u001b[1;32mis\u001b[0m \u001b[0mreturned\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mfinal\u001b[0m \u001b[0melement\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    199\u001b[0m     of the list.\"\"\"\n\u001b[1;32m--> 200\u001b[1;33m     \u001b[1;32mreturn\u001b[0m \u001b[0m_compile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msplit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mstring\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mmaxsplit\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    201\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    202\u001b[0m \u001b[1;32mdef\u001b[0m \u001b[0mfindall\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mstring\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m0\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/opt/conda/lib/python3.4/re.py\u001b[0m in \u001b[0;36m_compile\u001b[1;34m(pattern, flags)\u001b[0m\n\u001b[0;32m    292\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0msre_compile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0misstring\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    293\u001b[0m         \u001b[1;32mraise\u001b[0m \u001b[0mTypeError\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"first argument must be string or compiled pattern\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 294\u001b[1;33m     \u001b[0mp\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0msre_compile\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mcompile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mpattern\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    295\u001b[0m     \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mbypass_cache\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    296\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mlen\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0m_cache\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;33m>=\u001b[0m \u001b[0m_MAXCACHE\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/opt/conda/lib/python3.4/sre_compile.py\u001b[0m in \u001b[0;36mcompile\u001b[1;34m(p, flags)\u001b[0m\n\u001b[0;32m    570\u001b[0m         \u001b[0mpattern\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    571\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 572\u001b[1;33m     \u001b[0mcode\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_code\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mp\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    573\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    574\u001b[0m     \u001b[1;31m# print code\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/opt/conda/lib/python3.4/sre_compile.py\u001b[0m in \u001b[0;36m_code\u001b[1;34m(p, flags)\u001b[0m\n\u001b[0;32m    555\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    556\u001b[0m     \u001b[1;31m# compile the pattern\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 557\u001b[1;33m     \u001b[0m_compile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdata\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    558\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    559\u001b[0m     \u001b[0mcode\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mOPCODES\u001b[0m\u001b[1;33m[\u001b[0m\u001b[0mSUCCESS\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;32m/opt/conda/lib/python3.4/sre_compile.py\u001b[0m in \u001b[0;36m_compile\u001b[1;34m(code, pattern, flags)\u001b[0m\n\u001b[0;32m    162\u001b[0m                 \u001b[0mlo\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mhi\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mav\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mgetwidth\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    163\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[0mlo\u001b[0m \u001b[1;33m!=\u001b[0m \u001b[0mhi\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 164\u001b[1;33m                     \u001b[1;32mraise\u001b[0m \u001b[0merror\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"look-behind requires fixed-width pattern\"\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    165\u001b[0m                 \u001b[0memit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mlo\u001b[0m\u001b[1;33m)\u001b[0m \u001b[1;31m# look behind\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    166\u001b[0m             \u001b[0m_compile\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mcode\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mav\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;36m1\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
      "\u001b[1;31merror\u001b[0m: look-behind requires fixed-width pattern"
     ]
    }
   ],
   "source": [
    "df=get_blog_data(user_name,user_id,user_count,begin=0)\n",
    "\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1.博主信息"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " ID　　  60278066\n",
      " 注册名  gaoya\n",
      " 昵称　  Peter\n",
      " 网址　  http://gaoya.blog.163.com/\n",
      " 邮箱　  gaoya@163.com\n",
      " 性别　  男\n",
      " 日志数  505\n"
     ]
    }
   ],
   "source": [
    "# 获取博主信息\n",
    "(user_id,user_name,user_nickname,user_baseurl,user_email,user_gender,user_count)=get_blog_userinfo(user_list[0])\n",
    "print(' ID　　 ',user_id)\n",
    "print(' 注册名 ',user_name)\n",
    "print(' 昵称　 ',user_nickname)\n",
    "print(' 网址　 ',user_baseurl)\n",
    "print(' 邮箱　 ',user_email)\n",
    "print(' 性别　 ',user_gender)\n",
    "print(' 日志数 ',user_count)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.爬取数据\n",
    "- 采用断点续爬。"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "没有新帖子或更新不成功！\n"
     ]
    }
   ],
   "source": [
    "#断点续爬数据\n",
    "def update_blog_data(user_name,user_id,user_count):\n",
    "    #检查是否已存在数据\n",
    "    try:\n",
    "        #已存在数据\n",
    "        df=read_blog_data(user_id)\n",
    "        #起始位置为当前记录数\n",
    "        new_df=get_blog_data(user_name,user_id,user_count,begin=len(df)) \n",
    "        if len(new_df)==0:\n",
    "            print('没有新帖子或更新不成功！')\n",
    "        else:\n",
    "            #拼接新旧数据\n",
    "            df=pd.concat([df,new_df],ignore_index=True)\n",
    "            #保存到cvs文件\n",
    "            save_blog_data(user_id,df) \n",
    "            print(' 共更新：%s 条帖子'%len(new_df))    \n",
    "    except:\n",
    "        #数据不存在，起始位置为0 \n",
    "        df=get_blog_data(user_name,user_id,user_count,begin=0) \n",
    "        if len(df)==0:\n",
    "            print('获取不成功！')\n",
    "        else:\n",
    "            #保存到cvs文件\n",
    "            save_blog_data(user_id,df) \n",
    "            print(' 共获取：%s 条帖子'%len(df))\n",
    "    \n",
    "update_blog_data(user_name,user_id,user_count)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.发送邮件"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      " 发送完毕！\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/opt/conda/lib/python3.4/site-packages/pandas/core/indexing.py:115: SettingWithCopyWarning: \n",
      "A value is trying to be set on a copy of a slice from a DataFrame\n",
      "\n",
      "See the the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
      "  self._setitem_with_indexer(indexer, value)\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "\n",
    "#引入自定义（jsl）库\n",
    "#保存当前目录\n",
    "path=os.getcwd()\n",
    "#更改到用户根目录为\n",
    "os.chdir(\"/home/jquser/\")\n",
    "#导入自己的库\n",
    "from mail import *\n",
    "#恢复当前目录\n",
    "os.chdir(path)\n",
    "\n",
    "#发送邮件\n",
    "def send_blog_data(user_id):\n",
    "    #读取数据\n",
    "    df=read_blog_data(user_id)\n",
    "    issend=False\n",
    "    for i in range(0,len(df)):\n",
    "        if df['sended'].iloc[i]==True:\n",
    "            continue\n",
    "        #邮件主题\n",
    "        subjiect='%s（博客日志 %s）'%(df.title.iloc[i],user_name.upper())\n",
    "        #邮件正文\n",
    "        message='<h2>%s</h2><p>%s</br>%s</p><hr /></br>%s'%(df.title.iloc[i],df.publishdate.iloc[i],\n",
    "                                               user_baseurl+df.permalink.iloc[i],df.content.iloc[i])\n",
    "        try:\n",
    "            #发送邮件\n",
    "            send_html_qqmail(subjiect,message)\n",
    "            #发送成功标志\n",
    "            df['sended'].iloc[i]=True\n",
    "            issend=True\n",
    "            #延时5秒，防止被踢\n",
    "            time.sleep(5)\n",
    "            print(' 已发送：%s'%(subjiect),end='\\r')\n",
    "        except Exception as e:\n",
    "            print(' 错误：%s'%e)\n",
    "            break\n",
    "    if issend==True:\n",
    "        #保存到cvs文件\n",
    "        save_blog_data(user_id,df) \n",
    "        print(' 发送完毕！')\n",
    "    else:\n",
    "        print(' 没有要发送的！')\n",
    "        \n",
    "send_blog_data(user_id)     "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4.产看数据表"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>title</th>\n",
       "      <th>publishdate</th>\n",
       "      <th>permalink</th>\n",
       "      <th>content</th>\n",
       "      <th>sended</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>再说资产配置</td>\n",
       "      <td>2014-3-23 21:43:47</td>\n",
       "      <td>blog/static/6027806620142239384860</td>\n",
       "      <td>刚才看到有博友问红利etf是否值得投资。那我就总体说说怎么根据资产配置原则选择etf。&lt;di...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>各种量化投资计划电梯</td>\n",
       "      <td>2014-1-4 13:14:14</td>\n",
       "      <td>blog/static/6027806620140405224549</td>\n",
       "      <td>在资本市场，成功的投资一般三种方式：&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;价值投...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>送给新朋友的几句话</td>\n",
       "      <td>2016-1-26 10:47:10</td>\n",
       "      <td>blog/static/602780662016026104710643</td>\n",
       "      <td>最近雪球、微博的观光团朋友们纷纷来看这个记录了本人几年投资历程的博客。&lt;wbr&gt;&lt;div&gt;&lt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>油气网格开始</td>\n",
       "      <td>2015-7-15 9:34:32</td>\n",
       "      <td>blog/static/60278066201561593432546</td>\n",
       "      <td>分级A回款到账，现在有钱又有闲。开始搞油气。&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>很多东西不在这里写了。朋友可以去雪球关注</td>\n",
       "      <td>2015-7-10 10:03:26</td>\n",
       "      <td>blog/static/60278066201561010326313</td>\n",
       "      <td>etf拯救世界&lt;wbr&gt;</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>还有没有A类亏损的朋友了？</td>\n",
       "      <td>2015-6-25 14:45:23</td>\n",
       "      <td>blog/static/60278066201552524523151</td>\n",
       "      <td>站出来，我保证不打你。&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;&lt;img title...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>大家误会了</td>\n",
       "      <td>2015-6-24 10:33:56</td>\n",
       "      <td>blog/static/602780662015524102551795</td>\n",
       "      <td>我不是说评论里谁说什么了我不高兴之类的。。大家还不了解我么，我还不了解大家么，而且，我有那么...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>看了上一贴的评论，感觉是万万没想到</td>\n",
       "      <td>2015-6-23 14:11:00</td>\n",
       "      <td>blog/static/6027806620155232110140</td>\n",
       "      <td>以后在这里说品种，大概有两个选择：&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;第一，少...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>同学们，来来来，都冒个泡</td>\n",
       "      <td>2015-6-19 13:43:04</td>\n",
       "      <td>blog/static/6027806620155191434417</td>\n",
       "      <td>在4500点，上证跌了600点之后，本博主在A类赚了一笔。资产增加得很愉快。&lt;wbr&gt;&lt;di...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>欢迎添加公众号</td>\n",
       "      <td>2015-6-10 20:50:37</td>\n",
       "      <td>blog/static/60278066201551085037391</td>\n",
       "      <td>欢迎添加本博公众号，及时接收最新文章&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;微信扫一扫&lt;/d...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>201506：本月卖出3份，买入2份</td>\n",
       "      <td>2015-6-1 9:09:15</td>\n",
       "      <td>blog/static/602780662015519915313</td>\n",
       "      <td>卖出：&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;2份100etf&lt;/div&gt;&lt;div...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>持仓A类一览</td>\n",
       "      <td>2015-5-30 16:03:14</td>\n",
       "      <td>blog/static/6027806620154304123867</td>\n",
       "      <td>每天都会有新的A类暴跌到开枪区域，现在已经持仓以下A类，如果有新的品种加入，会更新本文。&lt;d...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>今天有场大战役/没搞满！</td>\n",
       "      <td>2015-5-28 6:28:53</td>\n",
       "      <td>blog/static/60278066201542862853369</td>\n",
       "      <td>为了避免当年同瑞b说太早结果没达到最理想收益的情况，今天先不说。&lt;div&gt;&lt;br&gt;&lt;/div...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>每天a类精彩大戏不断，没精力一个个说了</td>\n",
       "      <td>2015-5-27 9:55:09</td>\n",
       "      <td>blog/static/6027806620154279559303</td>\n",
       "      <td>大家自己去集思录这个页面看，我说几个原则：&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>写在这个伟大的日子</td>\n",
       "      <td>2015-5-26 13:13:29</td>\n",
       "      <td>blog/static/60278066201542611329659</td>\n",
       "      <td>投资整整12年。&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;经历过多次凋敝的萧瑟，经历...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>本人现阶段资产配置表</td>\n",
       "      <td>2015-5-25 9:45:16</td>\n",
       "      <td>blog/static/60278066201542594516260</td>\n",
       "      <td>供大家参考，讨论。&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;这个资产配置，现在还在配置期。配置...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>卖出部分华安石油</td>\n",
       "      <td>2015-5-15 9:29:46</td>\n",
       "      <td>blog/static/60278066201541592946797</td>\n",
       "      <td>卖出一部分华安石油。华宝油气底仓未动。&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;腾出...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>开始关注A类</td>\n",
       "      <td>2015-5-8 10:34:19</td>\n",
       "      <td>blog/static/6027806620154810341917</td>\n",
       "      <td>150219 &amp;nbsp;健康A &amp;nbsp;+4.5 目前平价，收益率7%。&lt;wbr&gt;&lt;d...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>18</th>\n",
       "      <td>本月卖出3份</td>\n",
       "      <td>2015-5-4 8:44:42</td>\n",
       "      <td>blog/static/6027806620154484442834</td>\n",
       "      <td>卖出2份100etf&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;卖出1份180etf&lt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>19</th>\n",
       "      <td>门票目标，2015第一季全出炉</td>\n",
       "      <td>2015-4-30 9:32:04</td>\n",
       "      <td>blog/static/6027806620153309324578</td>\n",
       "      <td>原则：&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;10年计划，表中所列为过去十年，每个半年利润都...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>20</th>\n",
       "      <td>门票策略盘（更新完毕）</td>\n",
       "      <td>2015-4-24 13:15:59</td>\n",
       "      <td>blog/static/60278066201532411559682</td>\n",
       "      <td>&lt;table border=\"0\"   cellpadding=\"0\"   cellspac...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>21</th>\n",
       "      <td>动态再平衡组合</td>\n",
       "      <td>2015-4-23 13:11:43</td>\n",
       "      <td>blog/static/60278066201532311143939</td>\n",
       "      <td>今天在雪球做了个动态再平衡组合。基本代表了我的态度和想法。&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/d...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>22</th>\n",
       "      <td>在这个浮躁的时候，推荐大家看两本书</td>\n",
       "      <td>2015-4-22 8:40:26</td>\n",
       "      <td>blog/static/60278066201532284026394</td>\n",
       "      <td>以前推荐过一些书。而在这个浮躁的时候，再推荐两本最经典的&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;di...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>23</th>\n",
       "      <td>权作留念</td>\n",
       "      <td>2015-4-14 12:35:50</td>\n",
       "      <td>blog/static/60278066201531403341668</td>\n",
       "      <td>打新以来，赚钱不是最多，但涨停最多的一只。&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;没有之一。...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>24</th>\n",
       "      <td>即将进入死亡之顶</td>\n",
       "      <td>2015-4-13 15:53:25</td>\n",
       "      <td>blog/static/60278066201531335325899</td>\n",
       "      <td>全市场pb 5.5&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;死亡区域。&lt;/div&gt;&lt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>25</th>\n",
       "      <td>复杂系统中，会发生很多不可预计的事情</td>\n",
       "      <td>2015-4-9 15:04:46</td>\n",
       "      <td>blog/static/602780662015393446777</td>\n",
       "      <td>H股A照常理很大概率会被套利盘砸的谁都不认识。为什么没发生呢。&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>26</th>\n",
       "      <td>关注H股A</td>\n",
       "      <td>2015-4-8 14:04:58</td>\n",
       "      <td>blog/static/60278066201538245886</td>\n",
       "      <td>H股A+B整体溢价17%，可想而知套利大军快到了。B类由于港股预期极度亢奋，A类被干的机会非...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>27</th>\n",
       "      <td>不玩了。关注A股的同学近期不必来这里了</td>\n",
       "      <td>2015-4-2 16:34:31</td>\n",
       "      <td>blog/static/6027806620153243431373</td>\n",
       "      <td>明天清仓。不玩了。&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;为啥？pb破5了。&lt;br&gt;&lt;div...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>28</th>\n",
       "      <td>大风起兮猪飞扬</td>\n",
       "      <td>2015-3-31 16:18:33</td>\n",
       "      <td>blog/static/60278066201523141011580</td>\n",
       "      <td>话不多说，看图说话：&lt;wbr&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;1、全市场pe与深综指...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>29</th>\n",
       "      <td>同学们，你们账户里真的清干净了？</td>\n",
       "      <td>2015-3-24 10:54:07</td>\n",
       "      <td>blog/static/60278066201522410547990</td>\n",
       "      <td>看评论，不少同学都清了。&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;警惕性高是好事，但是，&lt;sp...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>475</th>\n",
       "      <td>又捐了</td>\n",
       "      <td>2006-8-28 9:50:13</td>\n",
       "      <td>blog/static/60278066200672895013408</td>\n",
       "      <td>&lt;div&gt;这次给山区的小朋友捐了我的不列颠少儿百科全书。一套四册精装的。。。&lt;/div&gt;&lt;d...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>476</th>\n",
       "      <td>周末，我家窗外...</td>\n",
       "      <td>2006-8-21 21:22:22</td>\n",
       "      <td>blog/static/60278066200672192222701</td>\n",
       "      <td>&lt;div&gt;&lt;/div&gt;&lt;table cellspacing=\"0\"  border=\"0\" ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>477</th>\n",
       "      <td>停止黑嘴</td>\n",
       "      <td>2006-7-3 9:21:56</td>\n",
       "      <td>blog/static/6027806620066392156812</td>\n",
       "      <td>&lt;div&gt;&lt;font face=\"黑体, Simhei\" size=\"4\"&gt;今天起不在这个地...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>478</th>\n",
       "      <td>我会买的股票</td>\n",
       "      <td>2006-6-28 8:58:20</td>\n",
       "      <td>blog/static/60278066200652885820353</td>\n",
       "      <td>&lt;div align=\"center\"&gt;&lt;font face=\"黑体, Simhei\" si...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>479</th>\n",
       "      <td>下周会大跌</td>\n",
       "      <td>2006-6-17 10:09:29</td>\n",
       "      <td>blog/static/60278066200651710929208</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;空仓一周，出去办事。&lt;/font&gt;&lt;/div&gt;&lt;di...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>480</th>\n",
       "      <td>1/3了</td>\n",
       "      <td>2006-6-2 22:34:57</td>\n",
       "      <td>blog/static/60278066200652103457434</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;今天，给赵景华汇出了今年的第三笔捐款。前五个月的捐款...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>481</th>\n",
       "      <td>半仓了</td>\n",
       "      <td>2006-5-26 17:20:42</td>\n",
       "      <td>blog/static/60278066200642652042599</td>\n",
       "      <td>&lt;div&gt;&lt;table cellspacing=\"0\" cellpadding=\"0\" al...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>482</th>\n",
       "      <td>比较开心</td>\n",
       "      <td>2006-5-26 11:39:56</td>\n",
       "      <td>blog/static/602780662006426113956964</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;上午出去办事，中午回来，昨天买入的王府井挣了10%，...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>483</th>\n",
       "      <td>一切按照剧本行动</td>\n",
       "      <td>2006-5-24 15:07:47</td>\n",
       "      <td>blog/static/6027806620064243747997</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;今天继续昨天的大跌。&lt;/font&gt;&lt;/div&gt;&lt;di...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>484</th>\n",
       "      <td>我爱基金</td>\n",
       "      <td>2006-5-15 10:07:56</td>\n",
       "      <td>blog/static/6027806620064151075683</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;今天最新数据&lt;/font&gt;&lt;/div&gt;&lt;div&gt;&lt;f...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>485</th>\n",
       "      <td>五一游记</td>\n",
       "      <td>2006-5-8 9:18:23</td>\n",
       "      <td>blog/static/6027806620064891823379</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"  &gt;出游两日，人困狗乏。回来后巴蒂睡了4天，东西都不...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>486</th>\n",
       "      <td>88888</td>\n",
       "      <td>2006-4-24 10:32:51</td>\n",
       "      <td>blog/static/602780662006324103251562</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;学习了！&lt;/font&gt;&lt;/div&gt;&lt;div&gt;&amp;nbs...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>487</th>\n",
       "      <td>支持HH</td>\n",
       "      <td>2006-3-30 13:37:55</td>\n",
       "      <td>blog/static/60278066200623013755299</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;为了支持HH，买了一本 一座城池 。用实际行动支持H...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>488</th>\n",
       "      <td>CPI</td>\n",
       "      <td>2006-3-20 9:01:08</td>\n",
       "      <td>blog/static/602780662006220918379</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;2月份的CPI出来了，0.9。前两个月累计增幅1.4...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>489</th>\n",
       "      <td>写字ing</td>\n",
       "      <td>2006-3-14 17:07:34</td>\n",
       "      <td>blog/static/6027806620062145734145</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"5\"&gt;一篇家居杂志的。&lt;/font&gt;&lt;/div&gt;&lt;div&gt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>490</th>\n",
       "      <td>看电视，无语</td>\n",
       "      <td>2006-3-9 15:49:47</td>\n",
       "      <td>blog/static/6027806620062934947258</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"5\"&gt;那天吃饭看凤凰卫视采访一个上海女的：&lt;/font&gt;&lt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>491</th>\n",
       "      <td>乌云盖顶</td>\n",
       "      <td>2006-3-7 15:19:13</td>\n",
       "      <td>blog/static/6027806620062731913272</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"4\"&gt;今天的走势，就是外国人说的乌云盖顶了。&lt;/font&gt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>492</th>\n",
       "      <td>今日DCSX指数</td>\n",
       "      <td>2006-3-6 8:33:20</td>\n",
       "      <td>blog/static/6027806620062683320669</td>\n",
       "      <td>&lt;div&gt;今日指数344&lt;/div&gt;</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>493</th>\n",
       "      <td>随便说说股票</td>\n",
       "      <td>2006-3-2 16:15:35</td>\n",
       "      <td>blog/static/6027806620062241535101</td>\n",
       "      <td>&lt;div&gt;&lt;font size=\"3\"&gt;股票本身有2种获利途径：1 投资这家公司，伴随这家公...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>494</th>\n",
       "      <td>乱弹历史——四大发明之一：活字印刷术</td>\n",
       "      <td>2006-2-9 9:16:24</td>\n",
       "      <td>blog/static/602780662006199162490</td>\n",
       "      <td>&lt;div align=\"center\"&gt;&lt;font color=\"#0000ff\" size...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>495</th>\n",
       "      <td>google,我服了你了</td>\n",
       "      <td>2006-1-19 11:03:59</td>\n",
       "      <td>blog/static/60278066200601911359994</td>\n",
       "      <td>&lt;div&gt;&lt;/div&gt;&lt;table cellspacing=\"0\"  border=\"0\" ...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>496</th>\n",
       "      <td>吃一堑要长一智</td>\n",
       "      <td>2005-12-27 13:25:26</td>\n",
       "      <td>blog/static/602780662005112712526199</td>\n",
       "      <td>&lt;div&gt;上一篇东西被新浪财经拿去放到了首页，结果有人就发现文章中的错误了~&lt;/div&gt;&lt;d...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>497</th>\n",
       "      <td>联合国说</td>\n",
       "      <td>2005-10-12 13:09:27</td>\n",
       "      <td>blog/static/6027806620059121927922</td>\n",
       "      <td>&lt;div&gt;联合国开发计划署《2005年人类发展报告》指出，如果把上海比作一个国家的话，其人类...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>498</th>\n",
       "      <td>人生 II</td>\n",
       "      <td>2005-7-6 13:10:34</td>\n",
       "      <td>blog/static/6027806620056611034922</td>\n",
       "      <td>&lt;div&gt;　　视爱情为奢侈品：有最好，没有也能活。&amp;nbsp;&lt;br /&gt;　　签任何合同之前...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>人生 I</td>\n",
       "      <td>2005-7-6 13:09:46</td>\n",
       "      <td>blog/static/602780662005661946821</td>\n",
       "      <td>&lt;div&gt;&lt;/div&gt;</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>500</th>\n",
       "      <td>无股可买的巴菲特  去年赚了73亿美元</td>\n",
       "      <td>2005-3-7 10:22:20</td>\n",
       "      <td>blog/static/6027806620052710222094</td>\n",
       "      <td>&lt;p&gt;&amp;nbsp;&amp;nbsp;&amp;nbsp; 我的偶像之一，股神巴菲特，在上一个年度中，由于再...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>501</th>\n",
       "      <td>大跌眼镜</td>\n",
       "      <td>2005-2-22 15:19:29</td>\n",
       "      <td>blog/static/60278066200512231929635</td>\n",
       "      <td>&lt;p&gt;早上被我讽刺的&lt;strong&gt;&lt;font color=\"#ffff33\"  &gt;三木&lt;/...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>502</th>\n",
       "      <td>财神助威</td>\n",
       "      <td>2005-2-21 15:15:19</td>\n",
       "      <td>blog/static/60278066200512131519212</td>\n",
       "      <td>&lt;p&gt;今年第一单，买入后当日涨了6%，第二日出手，但是量太小了，没有什么赚头。华源制药&lt;/p...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>503</th>\n",
       "      <td>所谓人生好时节...</td>\n",
       "      <td>2005-2-21 14:42:07</td>\n",
       "      <td>blog/static/6027806620051212427903</td>\n",
       "      <td>春有百花&lt;br /&gt;秋望月&lt;br /&gt;夏有凉风&lt;br /&gt;冬听雪&lt;br /&gt;心中若无烦恼事&lt;...</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>504</th>\n",
       "      <td>oops...</td>\n",
       "      <td>2005-2-21 14:30:43</td>\n",
       "      <td>blog/static/60278066200512123043697</td>\n",
       "      <td>&lt;p&gt;这是一个新玩艺儿。。。&lt;/p&gt;</td>\n",
       "      <td>True</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>505 rows × 5 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                    title          publishdate  \\\n",
       "0                  再说资产配置   2014-3-23 21:43:47   \n",
       "1              各种量化投资计划电梯    2014-1-4 13:14:14   \n",
       "2               送给新朋友的几句话   2016-1-26 10:47:10   \n",
       "3                  油气网格开始    2015-7-15 9:34:32   \n",
       "4    很多东西不在这里写了。朋友可以去雪球关注   2015-7-10 10:03:26   \n",
       "5           还有没有A类亏损的朋友了？   2015-6-25 14:45:23   \n",
       "6                   大家误会了   2015-6-24 10:33:56   \n",
       "7       看了上一贴的评论，感觉是万万没想到   2015-6-23 14:11:00   \n",
       "8            同学们，来来来，都冒个泡   2015-6-19 13:43:04   \n",
       "9                 欢迎添加公众号   2015-6-10 20:50:37   \n",
       "10     201506：本月卖出3份，买入2份     2015-6-1 9:09:15   \n",
       "11                 持仓A类一览   2015-5-30 16:03:14   \n",
       "12           今天有场大战役/没搞满！    2015-5-28 6:28:53   \n",
       "13    每天a类精彩大戏不断，没精力一个个说了    2015-5-27 9:55:09   \n",
       "14              写在这个伟大的日子   2015-5-26 13:13:29   \n",
       "15             本人现阶段资产配置表    2015-5-25 9:45:16   \n",
       "16               卖出部分华安石油    2015-5-15 9:29:46   \n",
       "17                 开始关注A类    2015-5-8 10:34:19   \n",
       "18                 本月卖出3份     2015-5-4 8:44:42   \n",
       "19        门票目标，2015第一季全出炉    2015-4-30 9:32:04   \n",
       "20            门票策略盘（更新完毕）   2015-4-24 13:15:59   \n",
       "21                动态再平衡组合   2015-4-23 13:11:43   \n",
       "22      在这个浮躁的时候，推荐大家看两本书    2015-4-22 8:40:26   \n",
       "23                   权作留念   2015-4-14 12:35:50   \n",
       "24               即将进入死亡之顶   2015-4-13 15:53:25   \n",
       "25     复杂系统中，会发生很多不可预计的事情    2015-4-9 15:04:46   \n",
       "26                  关注H股A    2015-4-8 14:04:58   \n",
       "27    不玩了。关注A股的同学近期不必来这里了    2015-4-2 16:34:31   \n",
       "28                大风起兮猪飞扬   2015-3-31 16:18:33   \n",
       "29       同学们，你们账户里真的清干净了？   2015-3-24 10:54:07   \n",
       "..                    ...                  ...   \n",
       "475                   又捐了    2006-8-28 9:50:13   \n",
       "476            周末，我家窗外...   2006-8-21 21:22:22   \n",
       "477                  停止黑嘴     2006-7-3 9:21:56   \n",
       "478                我会买的股票    2006-6-28 8:58:20   \n",
       "479                 下周会大跌   2006-6-17 10:09:29   \n",
       "480                  1/3了    2006-6-2 22:34:57   \n",
       "481                   半仓了   2006-5-26 17:20:42   \n",
       "482                  比较开心   2006-5-26 11:39:56   \n",
       "483              一切按照剧本行动   2006-5-24 15:07:47   \n",
       "484                  我爱基金   2006-5-15 10:07:56   \n",
       "485                  五一游记     2006-5-8 9:18:23   \n",
       "486                 88888   2006-4-24 10:32:51   \n",
       "487                  支持HH   2006-3-30 13:37:55   \n",
       "488                   CPI    2006-3-20 9:01:08   \n",
       "489                 写字ing   2006-3-14 17:07:34   \n",
       "490                看电视，无语    2006-3-9 15:49:47   \n",
       "491                  乌云盖顶    2006-3-7 15:19:13   \n",
       "492              今日DCSX指数     2006-3-6 8:33:20   \n",
       "493                随便说说股票    2006-3-2 16:15:35   \n",
       "494    乱弹历史——四大发明之一：活字印刷术     2006-2-9 9:16:24   \n",
       "495          google,我服了你了   2006-1-19 11:03:59   \n",
       "496               吃一堑要长一智  2005-12-27 13:25:26   \n",
       "497                  联合国说  2005-10-12 13:09:27   \n",
       "498                 人生 II    2005-7-6 13:10:34   \n",
       "499                  人生 I    2005-7-6 13:09:46   \n",
       "500   无股可买的巴菲特  去年赚了73亿美元    2005-3-7 10:22:20   \n",
       "501                  大跌眼镜   2005-2-22 15:19:29   \n",
       "502                  财神助威   2005-2-21 15:15:19   \n",
       "503            所谓人生好时节...   2005-2-21 14:42:07   \n",
       "504               oops...   2005-2-21 14:30:43   \n",
       "\n",
       "                                permalink  \\\n",
       "0      blog/static/6027806620142239384860   \n",
       "1      blog/static/6027806620140405224549   \n",
       "2    blog/static/602780662016026104710643   \n",
       "3     blog/static/60278066201561593432546   \n",
       "4     blog/static/60278066201561010326313   \n",
       "5     blog/static/60278066201552524523151   \n",
       "6    blog/static/602780662015524102551795   \n",
       "7      blog/static/6027806620155232110140   \n",
       "8      blog/static/6027806620155191434417   \n",
       "9     blog/static/60278066201551085037391   \n",
       "10      blog/static/602780662015519915313   \n",
       "11     blog/static/6027806620154304123867   \n",
       "12    blog/static/60278066201542862853369   \n",
       "13     blog/static/6027806620154279559303   \n",
       "14    blog/static/60278066201542611329659   \n",
       "15    blog/static/60278066201542594516260   \n",
       "16    blog/static/60278066201541592946797   \n",
       "17     blog/static/6027806620154810341917   \n",
       "18     blog/static/6027806620154484442834   \n",
       "19     blog/static/6027806620153309324578   \n",
       "20    blog/static/60278066201532411559682   \n",
       "21    blog/static/60278066201532311143939   \n",
       "22    blog/static/60278066201532284026394   \n",
       "23    blog/static/60278066201531403341668   \n",
       "24    blog/static/60278066201531335325899   \n",
       "25      blog/static/602780662015393446777   \n",
       "26       blog/static/60278066201538245886   \n",
       "27     blog/static/6027806620153243431373   \n",
       "28    blog/static/60278066201523141011580   \n",
       "29    blog/static/60278066201522410547990   \n",
       "..                                    ...   \n",
       "475   blog/static/60278066200672895013408   \n",
       "476   blog/static/60278066200672192222701   \n",
       "477    blog/static/6027806620066392156812   \n",
       "478   blog/static/60278066200652885820353   \n",
       "479   blog/static/60278066200651710929208   \n",
       "480   blog/static/60278066200652103457434   \n",
       "481   blog/static/60278066200642652042599   \n",
       "482  blog/static/602780662006426113956964   \n",
       "483    blog/static/6027806620064243747997   \n",
       "484    blog/static/6027806620064151075683   \n",
       "485    blog/static/6027806620064891823379   \n",
       "486  blog/static/602780662006324103251562   \n",
       "487   blog/static/60278066200623013755299   \n",
       "488     blog/static/602780662006220918379   \n",
       "489    blog/static/6027806620062145734145   \n",
       "490    blog/static/6027806620062934947258   \n",
       "491    blog/static/6027806620062731913272   \n",
       "492    blog/static/6027806620062683320669   \n",
       "493    blog/static/6027806620062241535101   \n",
       "494     blog/static/602780662006199162490   \n",
       "495   blog/static/60278066200601911359994   \n",
       "496  blog/static/602780662005112712526199   \n",
       "497    blog/static/6027806620059121927922   \n",
       "498    blog/static/6027806620056611034922   \n",
       "499     blog/static/602780662005661946821   \n",
       "500    blog/static/6027806620052710222094   \n",
       "501   blog/static/60278066200512231929635   \n",
       "502   blog/static/60278066200512131519212   \n",
       "503    blog/static/6027806620051212427903   \n",
       "504   blog/static/60278066200512123043697   \n",
       "\n",
       "                                               content sended  \n",
       "0    刚才看到有博友问红利etf是否值得投资。那我就总体说说怎么根据资产配置原则选择etf。<di...   True  \n",
       "1    在资本市场，成功的投资一般三种方式：<wbr><div><br></div><div>价值投...   True  \n",
       "2    最近雪球、微博的观光团朋友们纷纷来看这个记录了本人几年投资历程的博客。<wbr><div><...   True  \n",
       "3    分级A回款到账，现在有钱又有闲。开始搞油气。<wbr><div><br></div><div...   True  \n",
       "4                                         etf拯救世界<wbr>   True  \n",
       "5    站出来，我保证不打你。<wbr><div><br></div><div><img title...   True  \n",
       "6    我不是说评论里谁说什么了我不高兴之类的。。大家还不了解我么，我还不了解大家么，而且，我有那么...   True  \n",
       "7    以后在这里说品种，大概有两个选择：<wbr><div><br></div><div>第一，少...   True  \n",
       "8    在4500点，上证跌了600点之后，本博主在A类赚了一笔。资产增加得很愉快。<wbr><di...   True  \n",
       "9    欢迎添加本博公众号，及时接收最新文章<div><br></div><div>微信扫一扫</d...   True  \n",
       "10   卖出：<wbr><div><br></div><div>2份100etf</div><div...   True  \n",
       "11   每天都会有新的A类暴跌到开枪区域，现在已经持仓以下A类，如果有新的品种加入，会更新本文。<d...   True  \n",
       "12   为了避免当年同瑞b说太早结果没达到最理想收益的情况，今天先不说。<div><br></div...   True  \n",
       "13   大家自己去集思录这个页面看，我说几个原则：<wbr><div><br></div><div>...   True  \n",
       "14   投资整整12年。<wbr><div><br></div><div>经历过多次凋敝的萧瑟，经历...   True  \n",
       "15   供大家参考，讨论。<div><br></div><div>这个资产配置，现在还在配置期。配置...   True  \n",
       "16   卖出一部分华安石油。华宝油气底仓未动。<wbr><div><br></div><div>腾出...   True  \n",
       "17   150219 &nbsp;健康A &nbsp;+4.5 目前平价，收益率7%。<wbr><d...   True  \n",
       "18   卖出2份100etf<wbr><div><br></div><div>卖出1份180etf<...   True  \n",
       "19   原则：<div><br></div><div>10年计划，表中所列为过去十年，每个半年利润都...   True  \n",
       "20   <table border=\"0\"   cellpadding=\"0\"   cellspac...   True  \n",
       "21   今天在雪球做了个动态再平衡组合。基本代表了我的态度和想法。<wbr><div><br></d...   True  \n",
       "22   以前推荐过一些书。而在这个浮躁的时候，再推荐两本最经典的<div><br></div><di...   True  \n",
       "23   打新以来，赚钱不是最多，但涨停最多的一只。<div><br></div><div>没有之一。...   True  \n",
       "24   全市场pb 5.5<wbr><div><br></div><div>死亡区域。</div><...   True  \n",
       "25   H股A照常理很大概率会被套利盘砸的谁都不认识。为什么没发生呢。<wbr><div><br><...   True  \n",
       "26   H股A+B整体溢价17%，可想而知套利大军快到了。B类由于港股预期极度亢奋，A类被干的机会非...   True  \n",
       "27   明天清仓。不玩了。<div><br></div><div>为啥？pb破5了。<br><div...   True  \n",
       "28   话不多说，看图说话：<wbr><div><br></div><div>1、全市场pe与深综指...   True  \n",
       "29   看评论，不少同学都清了。<div><br></div><div>警惕性高是好事，但是，<sp...   True  \n",
       "..                                                 ...    ...  \n",
       "475  <div>这次给山区的小朋友捐了我的不列颠少儿百科全书。一套四册精装的。。。</div><d...   True  \n",
       "476  <div></div><table cellspacing=\"0\"  border=\"0\" ...   True  \n",
       "477  <div><font face=\"黑体, Simhei\" size=\"4\">今天起不在这个地...   True  \n",
       "478  <div align=\"center\"><font face=\"黑体, Simhei\" si...   True  \n",
       "479  <div><font size=\"3\">空仓一周，出去办事。</font></div><di...   True  \n",
       "480  <div><font size=\"3\">今天，给赵景华汇出了今年的第三笔捐款。前五个月的捐款...   True  \n",
       "481  <div><table cellspacing=\"0\" cellpadding=\"0\" al...   True  \n",
       "482  <div><font size=\"3\">上午出去办事，中午回来，昨天买入的王府井挣了10%，...   True  \n",
       "483  <div><font size=\"3\">今天继续昨天的大跌。</font></div><di...   True  \n",
       "484  <div><font size=\"3\">今天最新数据</font></div><div><f...   True  \n",
       "485  <div><font size=\"3\"  >出游两日，人困狗乏。回来后巴蒂睡了4天，东西都不...   True  \n",
       "486  <div><font size=\"3\">学习了！</font></div><div>&nbs...   True  \n",
       "487  <div><font size=\"3\">为了支持HH，买了一本 一座城池 。用实际行动支持H...   True  \n",
       "488  <div><font size=\"3\">2月份的CPI出来了，0.9。前两个月累计增幅1.4...   True  \n",
       "489  <div><font size=\"5\">一篇家居杂志的。</font></div><div>...   True  \n",
       "490  <div><font size=\"5\">那天吃饭看凤凰卫视采访一个上海女的：</font><...   True  \n",
       "491  <div><font size=\"4\">今天的走势，就是外国人说的乌云盖顶了。</font>...   True  \n",
       "492                                 <div>今日指数344</div>   True  \n",
       "493  <div><font size=\"3\">股票本身有2种获利途径：1 投资这家公司，伴随这家公...   True  \n",
       "494  <div align=\"center\"><font color=\"#0000ff\" size...   True  \n",
       "495  <div></div><table cellspacing=\"0\"  border=\"0\" ...   True  \n",
       "496  <div>上一篇东西被新浪财经拿去放到了首页，结果有人就发现文章中的错误了~</div><d...   True  \n",
       "497  <div>联合国开发计划署《2005年人类发展报告》指出，如果把上海比作一个国家的话，其人类...   True  \n",
       "498  <div>　　视爱情为奢侈品：有最好，没有也能活。&nbsp;<br />　　签任何合同之前...   True  \n",
       "499                                        <div></div>   True  \n",
       "500  <p>&nbsp;&nbsp;&nbsp; 我的偶像之一，股神巴菲特，在上一个年度中，由于再...   True  \n",
       "501  <p>早上被我讽刺的<strong><font color=\"#ffff33\"  >三木</...   True  \n",
       "502  <p>今年第一单，买入后当日涨了6%，第二日出手，但是量太小了，没有什么赚头。华源制药</p...   True  \n",
       "503  春有百花<br />秋望月<br />夏有凉风<br />冬听雪<br />心中若无烦恼事<...   True  \n",
       "504                                 <p>这是一个新玩艺儿。。。</p>   True  \n",
       "\n",
       "[505 rows x 5 columns]"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df=read_blog_data(user_id)\n",
    "# df['sended']=False\n",
    "# save_blog_data(user_id,df)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.4.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
